***********************************************************************************
***	Replication file for:                                                     	***
*** Braun, S. T. and Stuhler, J. (2024). The Economic Consequences of 			***
***	Being Widowed by War: A Life-Cycle Perspective.	     						***
***																				***
***	Journal of Public Economics                                                 ***
***   							                                                ***
*** Script:		_20-ghs-1-2-prep.do					   					 		***	
*** Purpose:	merge GHS-1 and GHS-2 data										***
***																				***
***********************************************************************************


***********************
** Merge data 
***********************

*** GHS-1 life-cycle/panel data
use "$widowsghs/processed/LVS1_panel_data_IP.dta", clear 
gen data="LVS-1"

*** Merge with selected cross-sectional characteristics
merge m:1 fid using "$widowsghs/processed/Intermediate/LVIPa_CS_combined_edt.dta", update ///
	  keepusing(geschlecht schuljahre_o_uni_1001 schuljahre_m_uni_1001 ausbildungsjahre_1001 beruf_prestige_max_1001 geburtsjahr_1001 ///
		  		schuljahre_o_uni_1002 schuljahre_m_uni_1002 ausbildungsjahre_1002 geburtsjahr_1002 /// 
		  		anz_geschwister)     
drop _merge 

*** Merge with data on parental deaths
merge m:1 fid using "$widowsghs/processed/intermediate/LV1_shock.dta"  
drop _merge

*** Append LVS-2 panel-data */
append using "$widowsghs/processed/LVS2_panel_data_IP.dta" 
replace data="LVS-2" if data==""

*** Merge with selected cross-sectional characteristics
merge m:1 fid using "$widowsghs/processed/LVII_CS_combined_edt.dta", update ///
	  keepusing(geburtsjahr_2001 geschlecht_2001 ///
	  heiratsjahr_2002 todesjahr_2002 ///
	  schuljahre_o_uni_1001 schuljahre_m_uni_1001 ausbildungsjahre_1001 beruf_prestige_max_tr_1001 geburtsjahr_1001 ///
	  schuljahre_o_uni_1002 schuljahre_m_uni_1002 ausbildungsjahre_1002 geburtsjahr_1002 ///   
	  anz_geschwister_2001)  

replace geburtsjahr = geburtsjahr_2001 if data=="LVS-2"	
replace geschlecht = geschlecht_2001 if data=="LVS-2"
replace anz_geschwister = anz_geschwister_2001 if data=="LVS-2"		

drop geschlecht_2001 geburtsjahr_2001 anz_geschwister_2001		
				
drop _merge
replace beruf_prestige_max_1001=beruf_prestige_max_tr_1001 if data=="LVS-2"
drop beruf_prestige_max_tr_*


***********************
** Define cohorts
***********************


* Geburtskohorte Indexperson
gen 	kohorte = 1 if geburtsjahr >= 1919 & geburtsjahr <= 1921    // LVS-2 according to their numeration
replace kohorte = 2 if geburtsjahr >= 1929 & geburtsjahr <= 1931    // LVS-1
replace kohorte = 3 if geburtsjahr >= 1939 & geburtsjahr <= 1941    // LVS-1
replace kohorte = 4 if geburtsjahr >= 1949 & geburtsjahr <= 1951    // LVS-1
replace kohorte = 5 if geburtsjahr >= 1954 & geburtsjahr <= 1956
replace kohorte = 6 if geburtsjahr >= 1959 & geburtsjahr <= 1961

order kohorte, after(geburtsjahr)

label var kohorte "Birth cohort"


***********************
** Recode and clean up life-cycle variables
***********************

* Negative ages not needed
drop if alter_j<0 | alter_j==.

* Replace missings with zeros for spell variables => Assumption: No spell recorded at t == no spell 
foreach var of varlist dienst_t erwerbstaetig_t schulzeit_t  {
	replace `var'=0 if `var'==. & alter_j>=0 & alter_j<.
}
foreach var of varlist  unterbrechung_p1_t unterbrechung_p2_t  {
	replace `var'=0 if `var'==. & alter_j>=0 & alter_j<. & (kohorte == 2 | kohorte == 3 | kohorte == 4)
}
foreach var of varlist   luecke_schule_erwerbs_t erwerbsluecke_t  {
	replace `var'=0 if `var'==. & alter_j>=0 & alter_j<. & kohorte == 1
}


* Illness
* Distinction between chronic and acute llness only for Cohort 1
* Plus: no spell data for subset of Cohort 1 (LVS2TA), as only the spell beginning was recorded
* Here: Merge both into (general) illness, set to missing for subset of cohort 1
foreach var of varlist illness_chronic_t illness_acute_t  {
	replace `var'=0 if `var'==. & (alter_j>=0 & alter_j<.) & kohorte == 1 & lv2 == "a"
}
replace	illness_t = 0 if illness_t == . & (alter_j>=0 & alter_j<.) & (kohorte == 2 | kohorte == 3 | kohorte == 4)

replace illness_t = min(illness_acute_t+illness_chronic_t,1) if (alter_j>=0 & alter_j<.) & kohorte==1 & lv2 == "a"

drop illness_chronic_t illness_acute_t


* Occupational Prestige 

* General rules as in EJ Paper:
* Prestige is missing if: Occupation missing, still in training or retired, occupation indeterminable or inadequately described
* Prestige is zero if: Job seeker or non-employed housewife

* Here: Use Treiman scale  and recode prestige as zero if not working at time t

replace beruf_prestige_t=prestige_beruf_trei_t if kohorte>=2 & kohorte<=4
recode 	beruf_prestige_t . = 0 if erwerbstaetig_t == 0
replace beruf_prestige_t=. if beruf_prestige_t==99	/* value not defined */

* Separation from mother/father
* Defined only below 16 and for Cohorts 2-4
replace trennva_t=0 if trennva_t==. & (alter_j>= 0 & alter_j<=16) & (kohorte == 2 | kohorte == 3 | kohorte == 4)
replace trennma_t=0 if trennma_t==. & (alter_j>= 0 & alter_j<=16) & (kohorte == 2 | kohorte == 3 | kohorte == 4)
replace trennva_t=. if alter_j>16  // already less populated for alter_j==16
replace trennma_t=. if alter_j>16  // already less populated for alter_j==16


* Number of marriage
* Replace with previous number if missing (=> for older people)
* Recode . to 0 for Cohort 1
bysort fid (t): replace ehenr_t=ehenr_t[_n-1] if ehenr_t==.
replace ehenr_t=0 if ehenr_t==. & kohorte==1

* School spells
* Unify variable names
replace schulzeit_t = 0 if schulzeit_t == . & (kohorte>=2 & kohorte<=4)
replace schulspell_t = 0 if schulspell_t == . & kohorte==1

gen schule_t=schulzeit_t if (kohorte>=2 & kohorte<=4)
replace schule_t=schulspell_t if kohorte==1

label var schule_t "School attendance at time t?"

drop schulspell_t schulzeit_t

order schule_t, after(___Schooling______________)


* Vocational training
* Unify variable names
replace ausbildung_t=ausbildungszeit_t if kohorte==1
replace ausbildung_t = 0 if ausbildung_t==. 
drop ausbildungszeit_t

* Hours per week
* Unify variable names
replace wochenstunden_beruf_t = wochenstunden_t if kohorte == 1
drop wochenstunden_t

* Full-time employed
*gen fulltime_t = 1 if (erwerbstaetig_t==1 & wochenstunden_beruf_t >=35)
*replace fulltime_t = 0 if (erwerbstaetig_t==0 | (erwerbstaetig_t == 1 & wochenstunden_beruf_t < 35 & wochenstunden_beruf_t != .))

* Dummies for occupation and sector
tab occupation_7_t, gen(d_occupation_t_)
tab sector_3_t, gen(d_sector_t_)
*gen outside_agriculture_t = 1-d_sector_t_1

* There are too many zeroes at the end of the observation period –> recode to missings after apparent censoring point 
* Cohort 1: First interview year is 1985 -- Pilotstudie 1984/5 // Cohorts 2-4: First interview year is 1981 -- Vorstudie 1979/80
foreach var of varlist anzahl_kinder_t beruf_prestige_t erwerbstaetig_t illness_t ausbildung_t unterbrechung_p1_t unterbrechung_p2_t schule_t ausbildung_t ///
	{
	replace `var'=. if kohorte == 1 & alter_j>65
	replace `var'=. if geburtsjahr == 1921 & lv2=="a" & alter_j>64	// lv2a were already interviewed in 1985/86
	replace `var'=. if kohorte == 2 & alter_j>49
	replace `var'=. if kohorte == 3 & alter_j>39
	replace `var'=. if kohorte == 4 & alter_j>29
}



*******************************
** Order
*******************************

order lv2, after(jahr)
order beruf_t beruf_prestige_t d_occupation_t_1 d_occupation_t_2 d_occupation_t_3 d_occupation_t_4 d_occupation_t_5 d_occupation_t_6 d_occupation_t_7 d_sector_t_1 d_sector_t_2 d_sector_t_3, after(grund_taet_wechsel_t)
*order leiden_koeper1_acute_t leiden_koeper2_acute_t leiden_funkt1_chr_t leiden_funkt2_chr_t leiden_koeper1_chronic_t leiden_koeper2_chronic_t leiden_funkt1_chronic_t leiden_funkt2_chronic_t, after(leiden_funktion_t)
order luecke_schule_erwerbs_t luecke_se_aktivitität_t erwerbsluecke_t erwerbsluecke_taetigkeit_t grund1_erwerbsluecke_t grund2_erwerbsluecke_t, after(aktivitaet_luecke_p2_t)
order schulart_t klasse_t schulabschluss_t, after(schule_t)
order ___Demographics_____ geburtsjahr kohorte geschlecht anz_geschwister ___Age___________________ alter_t alter_j, after(jahr)

*******************************
** Drop superfluous variables
*******************************

drop ___Panel_Data______________ data ___IDs______________

drop grund_taet_wechsel_t ausbildungsabschluss_t 


***********************
** Save
***********************

compress
save "$widowsghs/processed/GHS_widows_data_main.dta", replace 

